conda install pandas
Collecting package metadata (current_repodata.json): done
Solving environment: done
==> WARNING: A newer version of conda exists. <==
current version: 4.10.3
latest version: 23.1.0
Please update conda by running
$ conda update -n base -c defaults conda
# All requested packages already installed.
Note: you may need to restart the kernel to use updated packages.
import pandas as pd
import os
path = os.path.join("data","BA_reviews.csv")
if os.path.isfile(path):
data = pd.read_csv(path)
else:
print(f"file {path} does not exist")
data = pd.read_csv("data/BA_reviews.csv")
print(data)
Unnamed: 0 reviews 0 0 ✅ Trip Verified | Excellent service both on th... 1 1 ✅ Trip Verified | Good lounge at Cape Town. O... 2 2 ✅ Trip Verified | A really excellent journey.... 3 3 ✅ Trip Verified | This flight was one of the ... 4 4 Not Verified | It seems that there is a race t... .. ... ... 995 995 ✅ Trip Verified | Flew British Airways from Lo... 996 996 ✅ Trip Verified | Madrid to London. The main ... 997 997 ✅ Trip Verified | London to Moscow. British A... 998 998 ✅ Trip Verified | Miami to London. My most re... 999 999 ✅ Trip Verified | Gatwick to Barbados in Dece... [1000 rows x 2 columns]
data.describe(include='all')
| Unnamed: 0 | reviews | |
|---|---|---|
| count | 1000.000000 | 1000 |
| unique | NaN | 1000 |
| top | NaN | ✅ Trip Verified | Excellent service both on th... |
| freq | NaN | 1 |
| mean | 499.500000 | NaN |
| std | 288.819436 | NaN |
| min | 0.000000 | NaN |
| 25% | 249.750000 | NaN |
| 50% | 499.500000 | NaN |
| 75% | 749.250000 | NaN |
| max | 999.000000 | NaN |
data[data['reviews'].str.contains("Thailand")]
| Unnamed: 0 | reviews | |
|---|---|---|
| 777 | 777 | ✅ Trip Verified | I had flown British Airways ... |
| 841 | 841 | ✅ Trip Verified | London to Bangkok. Flew Bri... |
data.rename(columns={'Unnamed':'trip_verification'}, inplace=True)
data = pd.read_csv("data/BA_reviews.csv")
data.head()
| Unnamed: 0 | reviews | |
|---|---|---|
| 0 | 0 | ✅ Trip Verified | Excellent service both on th... |
| 1 | 1 | ✅ Trip Verified | Good lounge at Cape Town. O... |
| 2 | 2 | ✅ Trip Verified | A really excellent journey.... |
| 3 | 3 | ✅ Trip Verified | This flight was one of the ... |
| 4 | 4 | Not Verified | It seems that there is a race t... |
print(data)
Unnamed: 0 reviews 0 0 ✅ Trip Verified | Excellent service both on th... 1 1 ✅ Trip Verified | Good lounge at Cape Town. O... 2 2 ✅ Trip Verified | A really excellent journey.... 3 3 ✅ Trip Verified | This flight was one of the ... 4 4 Not Verified | It seems that there is a race t... .. ... ... 995 995 ✅ Trip Verified | Flew British Airways from Lo... 996 996 ✅ Trip Verified | Madrid to London. The main ... 997 997 ✅ Trip Verified | London to Moscow. British A... 998 998 ✅ Trip Verified | Miami to London. My most re... 999 999 ✅ Trip Verified | Gatwick to Barbados in Dece... [1000 rows x 2 columns]
pip install country_list
Requirement already satisfied: country_list in ./opt/anaconda3/lib/python3.9/site-packages (1.0.0) Note: you may need to restart the kernel to use updated packages.
from country_list import available_languages, countries_for_language
for language in available_languages():
print(language)
break
af
countries = dict(countries_for_language('en'))
print(countries['TH'])
Thailand
# Get a list of country names for a given language (in this case, English)
country_list = [country[1] for country in countries_for_language('en')]
# Create a function to extract country names
def extract_country(reviews):
# Loop through the list of countries and check if the review contains a country name
for country in country_list:
if country in reviews:
return country
return None
data['countries'] = data['reviews'].apply(extract_country)
# Show the updated data frame
print(data)
Unnamed: 0 reviews countries 0 0 ✅ Trip Verified | Excellent service both on th... None 1 1 ✅ Trip Verified | Good lounge at Cape Town. O... None 2 2 ✅ Trip Verified | A really excellent journey.... None 3 3 ✅ Trip Verified | This flight was one of the ... None 4 4 Not Verified | It seems that there is a race t... None .. ... ... ... 995 995 ✅ Trip Verified | Flew British Airways from Lo... None 996 996 ✅ Trip Verified | Madrid to London. The main ... None 997 997 ✅ Trip Verified | London to Moscow. British A... None 998 998 ✅ Trip Verified | Miami to London. My most re... None 999 999 ✅ Trip Verified | Gatwick to Barbados in Dece... Barbados [1000 rows x 3 columns]
print(data.tail(5))
Unnamed: 0 reviews countries 995 995 ✅ Trip Verified | Flew British Airways from Lo... None 996 996 ✅ Trip Verified | Madrid to London. The main ... None 997 997 ✅ Trip Verified | London to Moscow. British A... None 998 998 ✅ Trip Verified | Miami to London. My most re... None 999 999 ✅ Trip Verified | Gatwick to Barbados in Dece... Barbados
data[data['reviews'].str.contains("London")]
| Unnamed: 0 | reviews | countries | |
|---|---|---|---|
| 7 | 7 | ✅ Trip Verified | Easy check in and staff mem... | None |
| 20 | 20 | ✅ Trip Verified | Absolutely terrible experie... | None |
| 21 | 21 | ✅ Trip Verified | Vancouver to Delhi via Lond... | None |
| 25 | 25 | Not Verified | BA cancelled my flight home, t... | None |
| 26 | 26 | ✅ Trip Verified | Turned up 3.5 hours in advan... | None |
| ... | ... | ... | ... |
| 994 | 994 | ✅ Trip Verified | Worst BA flight ever! Flew T... | None |
| 995 | 995 | ✅ Trip Verified | Flew British Airways from Lo... | None |
| 996 | 996 | ✅ Trip Verified | Madrid to London. The main ... | None |
| 997 | 997 | ✅ Trip Verified | London to Moscow. British A... | None |
| 998 | 998 | ✅ Trip Verified | Miami to London. My most re... | None |
558 rows × 3 columns
data["trip verification"] = data.reviews.str[:15]
data.tail()
| Unnamed: 0 | reviews | countries | trip verification | |
|---|---|---|---|---|
| 995 | 995 | ✅ Trip Verified | Flew British Airways from Lo... | None | ✅ Trip Verified |
| 996 | 996 | ✅ Trip Verified | Madrid to London. The main ... | None | ✅ Trip Verified |
| 997 | 997 | ✅ Trip Verified | London to Moscow. British A... | None | ✅ Trip Verified |
| 998 | 998 | ✅ Trip Verified | Miami to London. My most re... | None | ✅ Trip Verified |
| 999 | 999 | ✅ Trip Verified | Gatwick to Barbados in Dece... | Barbados | ✅ Trip Verified |
data['reviews'] = data['reviews'].str.replace('✅ Trip Verified', '')
data.tail()
| Unnamed: 0 | reviews | countries | trip verification | |
|---|---|---|---|---|
| 995 | 995 | | Flew British Airways from London Heathrow t... | None | ✅ Trip Verified |
| 996 | 996 | | Madrid to London. The main plus about this... | None | ✅ Trip Verified |
| 997 | 997 | | London to Moscow. British Airways has down... | None | ✅ Trip Verified |
| 998 | 998 | | Miami to London. My most recent BA experie... | None | ✅ Trip Verified |
| 999 | 999 | | Gatwick to Barbados in December 2017. On a... | Barbados | ✅ Trip Verified |
data['reviews'] = data['reviews'].str.replace('Not Verified', '')
data.tail()
| Unnamed: 0 | reviews | countries | trip verification | |
|---|---|---|---|---|
| 995 | 995 | | Flew British Airways from London Heathrow t... | None | ✅ Trip Verified |
| 996 | 996 | | Madrid to London. The main plus about this... | None | ✅ Trip Verified |
| 997 | 997 | | London to Moscow. British Airways has down... | None | ✅ Trip Verified |
| 998 | 998 | | Miami to London. My most recent BA experie... | None | ✅ Trip Verified |
| 999 | 999 | | Gatwick to Barbados in December 2017. On a... | Barbados | ✅ Trip Verified |
data.to_csv("modified_data.csv", index=False)
#questions for data: mention all of the times the British airlines is mentioned
# Count up all of the positive reviews with words such as good great excellent
print(data)
Unnamed: 0 reviews countries \
0 0 | Excellent service both on the ground and on... None
1 1 | Good lounge at Cape Town. On time departur... None
2 2 | A really excellent journey. Lounge not ove... None
3 3 | This flight was one of the worst I have ev... None
4 4 | It seems that there is a race to the bottom... None
.. ... ... ...
995 995 | Flew British Airways from London Heathrow t... None
996 996 | Madrid to London. The main plus about this... None
997 997 | London to Moscow. British Airways has down... None
998 998 | Miami to London. My most recent BA experie... None
999 999 | Gatwick to Barbados in December 2017. On a... Barbados
trip verification
0 ✅ Trip Verified
1 ✅ Trip Verified
2 ✅ Trip Verified
3 ✅ Trip Verified
4 Not Verified |
.. ...
995 ✅ Trip Verified
996 ✅ Trip Verified
997 ✅ Trip Verified
998 ✅ Trip Verified
999 ✅ Trip Verified
[1000 rows x 4 columns]
pip install gensim
Requirement already satisfied: gensim in ./opt/anaconda3/lib/python3.9/site-packages (4.3.0) Requirement already satisfied: scipy>=1.7.0 in ./opt/anaconda3/lib/python3.9/site-packages (from gensim) (1.7.1) Requirement already satisfied: numpy>=1.18.5 in ./opt/anaconda3/lib/python3.9/site-packages (from gensim) (1.20.3) Requirement already satisfied: FuzzyTM>=0.4.0 in ./opt/anaconda3/lib/python3.9/site-packages (from gensim) (2.0.5) Requirement already satisfied: smart-open>=1.8.1 in ./opt/anaconda3/lib/python3.9/site-packages (from gensim) (6.3.0) Requirement already satisfied: pandas in ./opt/anaconda3/lib/python3.9/site-packages (from FuzzyTM>=0.4.0->gensim) (1.3.4) Requirement already satisfied: pyfume in ./opt/anaconda3/lib/python3.9/site-packages (from FuzzyTM>=0.4.0->gensim) (0.2.25) Requirement already satisfied: python-dateutil>=2.7.3 in ./opt/anaconda3/lib/python3.9/site-packages (from pandas->FuzzyTM>=0.4.0->gensim) (2.8.2) Requirement already satisfied: pytz>=2017.3 in ./opt/anaconda3/lib/python3.9/site-packages (from pandas->FuzzyTM>=0.4.0->gensim) (2021.3) Requirement already satisfied: six>=1.5 in ./opt/anaconda3/lib/python3.9/site-packages (from python-dateutil>=2.7.3->pandas->FuzzyTM>=0.4.0->gensim) (1.16.0) Requirement already satisfied: simpful in ./opt/anaconda3/lib/python3.9/site-packages (from pyfume->FuzzyTM>=0.4.0->gensim) (2.9.0) Requirement already satisfied: fst-pso in ./opt/anaconda3/lib/python3.9/site-packages (from pyfume->FuzzyTM>=0.4.0->gensim) (1.8.1) Requirement already satisfied: miniful in ./opt/anaconda3/lib/python3.9/site-packages (from fst-pso->pyfume->FuzzyTM>=0.4.0->gensim) (0.0.6) Requirement already satisfied: requests in ./opt/anaconda3/lib/python3.9/site-packages (from simpful->pyfume->FuzzyTM>=0.4.0->gensim) (2.26.0) Requirement already satisfied: idna<4,>=2.5 in ./opt/anaconda3/lib/python3.9/site-packages (from requests->simpful->pyfume->FuzzyTM>=0.4.0->gensim) (3.2) Requirement already satisfied: urllib3<1.27,>=1.21.1 in ./opt/anaconda3/lib/python3.9/site-packages (from requests->simpful->pyfume->FuzzyTM>=0.4.0->gensim) (1.26.7) Requirement already satisfied: charset-normalizer~=2.0.0 in ./opt/anaconda3/lib/python3.9/site-packages (from requests->simpful->pyfume->FuzzyTM>=0.4.0->gensim) (2.0.4) Requirement already satisfied: certifi>=2017.4.17 in ./opt/anaconda3/lib/python3.9/site-packages (from requests->simpful->pyfume->FuzzyTM>=0.4.0->gensim) (2021.10.8) Note: you may need to restart the kernel to use updated packages.
import gensim
from gensim.utils import simple_preprocess
from gensim.parsing.preprocessing import STOPWORDS
from gensim import corpora
# Define a function to preprocess the review data
def preprocess(reviews):
return [word for word in simple_preprocess(reviews) if word not in STOPWORDS]
# Preprocess the reviews data
processed_reviews = data['reviews'].map(preprocess)
# Create a dictionary from the processed reviews data
dictionary = corpora.Dictionary(processed_reviews)
# Create a bag-of-words representation of the processed reviews data
bow_corpus = [dictionary.doc2bow(reviews) for reviews in processed_reviews]
# Train the topic modeling algorithm using the bag-of-words corpus
lda_model = gensim.models.LdaMulticore(bow_corpus, num_topics=10, id2word=dictionary, passes=2, workers=2)
# Print the top 10 keywords for each of the 10 topics generated
for idx, topic in lda_model.print_topics(-1):
print("Topic: {} \nWords: {}".format(idx, topic))
Topic: 0 Words: 0.036*"flight" + 0.021*"ba" + 0.009*"staff" + 0.009*"service" + 0.009*"london" + 0.008*"seat" + 0.008*"check" + 0.007*"food" + 0.007*"time" + 0.006*"plane" Topic: 1 Words: 0.024*"flight" + 0.012*"seats" + 0.012*"seat" + 0.011*"ba" + 0.010*"class" + 0.009*"london" + 0.009*"good" + 0.008*"business" + 0.008*"service" + 0.007*"great" Topic: 2 Words: 0.024*"flight" + 0.012*"ba" + 0.012*"airways" + 0.011*"london" + 0.011*"british" + 0.009*"good" + 0.009*"time" + 0.008*"service" + 0.008*"food" + 0.008*"airline" Topic: 3 Words: 0.022*"flight" + 0.016*"ba" + 0.010*"london" + 0.007*"service" + 0.007*"airport" + 0.006*"customer" + 0.006*"told" + 0.005*"asked" + 0.005*"said" + 0.005*"british" Topic: 4 Words: 0.022*"flight" + 0.016*"ba" + 0.012*"cabin" + 0.010*"service" + 0.010*"crew" + 0.010*"seats" + 0.010*"london" + 0.009*"food" + 0.009*"seat" + 0.008*"business" Topic: 5 Words: 0.023*"flight" + 0.016*"service" + 0.012*"ba" + 0.010*"london" + 0.009*"class" + 0.008*"business" + 0.007*"time" + 0.007*"food" + 0.007*"check" + 0.007*"crew" Topic: 6 Words: 0.032*"flight" + 0.017*"ba" + 0.013*"service" + 0.009*"london" + 0.008*"seat" + 0.007*"staff" + 0.007*"time" + 0.007*"crew" + 0.007*"hours" + 0.006*"aircraft" Topic: 7 Words: 0.019*"ba" + 0.015*"flight" + 0.015*"class" + 0.012*"business" + 0.012*"seat" + 0.012*"food" + 0.011*"good" + 0.009*"service" + 0.009*"club" + 0.007*"cabin" Topic: 8 Words: 0.018*"ba" + 0.016*"flight" + 0.012*"crew" + 0.010*"london" + 0.010*"service" + 0.009*"time" + 0.009*"heathrow" + 0.008*"cabin" + 0.007*"food" + 0.007*"good" Topic: 9 Words: 0.018*"flight" + 0.016*"ba" + 0.010*"london" + 0.009*"seats" + 0.007*"service" + 0.007*"food" + 0.006*"heathrow" + 0.006*"crew" + 0.005*"class" + 0.005*"time"
pip install wordcloud
Requirement already satisfied: wordcloud in ./opt/anaconda3/lib/python3.9/site-packages (1.8.2.2) Requirement already satisfied: numpy>=1.6.1 in ./opt/anaconda3/lib/python3.9/site-packages (from wordcloud) (1.20.3) Requirement already satisfied: pillow in ./opt/anaconda3/lib/python3.9/site-packages (from wordcloud) (8.4.0) Requirement already satisfied: matplotlib in ./opt/anaconda3/lib/python3.9/site-packages (from wordcloud) (3.4.3) Requirement already satisfied: cycler>=0.10 in ./opt/anaconda3/lib/python3.9/site-packages (from matplotlib->wordcloud) (0.10.0) Requirement already satisfied: python-dateutil>=2.7 in ./opt/anaconda3/lib/python3.9/site-packages (from matplotlib->wordcloud) (2.8.2) Requirement already satisfied: kiwisolver>=1.0.1 in ./opt/anaconda3/lib/python3.9/site-packages (from matplotlib->wordcloud) (1.3.1) Requirement already satisfied: pyparsing>=2.2.1 in ./opt/anaconda3/lib/python3.9/site-packages (from matplotlib->wordcloud) (3.0.4) Requirement already satisfied: six in ./opt/anaconda3/lib/python3.9/site-packages (from cycler>=0.10->matplotlib->wordcloud) (1.16.0) Note: you may need to restart the kernel to use updated packages.
from wordcloud import WordCloud
import matplotlib.pyplot as plt
reviews = data['reviews']
reviews_text = " ".join(review for review in reviews)
wordcloud = WordCloud(width=800, height=800, min_font_size=10).generate(reviews_text)
plt.figure(figsize=(8, 8), facecolor=None)
plt.imshow(wordcloud)
plt.axis("off")
plt.tight_layout(pad=0)
plt.show()
lda_model.save('model_file.lda')
print(data)
Unnamed: 0 reviews countries \
0 0 | Excellent service both on the ground and on... None
1 1 | Good lounge at Cape Town. On time departur... None
2 2 | A really excellent journey. Lounge not ove... None
3 3 | This flight was one of the worst I have ev... None
4 4 | It seems that there is a race to the bottom... None
.. ... ... ...
995 995 | Flew British Airways from London Heathrow t... None
996 996 | Madrid to London. The main plus about this... None
997 997 | London to Moscow. British Airways has down... None
998 998 | Miami to London. My most recent BA experie... None
999 999 | Gatwick to Barbados in December 2017. On a... Barbados
trip verification
0 ✅ Trip Verified
1 ✅ Trip Verified
2 ✅ Trip Verified
3 ✅ Trip Verified
4 Not Verified |
.. ...
995 ✅ Trip Verified
996 ✅ Trip Verified
997 ✅ Trip Verified
998 ✅ Trip Verified
999 ✅ Trip Verified
[1000 rows x 4 columns]
from collections import Counter
import pandas as pd
# Assuming that 'data' is a DataFrame with a column named 'text' containing text data
words = Counter(" ".join(data['reviews']).split())
most_common_words = words.most_common(10) # Returns the 10 most common words
print(most_common_words)
[('the', 6210), ('to', 5110), ('and', 4388), ('was', 3380), ('a', 3151), ('I', 2993), ('of', 1985), ('in', 1865), ('on', 1667), ('for', 1594)]
# Assuming 'model' is the trained topic model
num_words = 10 # Number of most common words to print for each topic
topics = lda_model.show_topics(num_topics=-1, num_words=num_words, formatted=False)
for topic in topics:
print(f"Topic {topic[0]}: ")
for word, weight in topic[1]:
print(f"\t{word} ({weight:.2f})")
Topic 0: flight (0.04) ba (0.02) staff (0.01) service (0.01) london (0.01) seat (0.01) check (0.01) food (0.01) time (0.01) plane (0.01) Topic 1: flight (0.02) seats (0.01) seat (0.01) ba (0.01) class (0.01) london (0.01) good (0.01) business (0.01) service (0.01) great (0.01) Topic 2: flight (0.02) ba (0.01) airways (0.01) london (0.01) british (0.01) good (0.01) time (0.01) service (0.01) food (0.01) airline (0.01) Topic 3: flight (0.02) ba (0.02) london (0.01) service (0.01) airport (0.01) customer (0.01) told (0.01) asked (0.01) said (0.00) british (0.00) Topic 4: flight (0.02) ba (0.02) cabin (0.01) service (0.01) crew (0.01) seats (0.01) london (0.01) food (0.01) seat (0.01) business (0.01) Topic 5: flight (0.02) service (0.02) ba (0.01) london (0.01) class (0.01) business (0.01) time (0.01) food (0.01) check (0.01) crew (0.01) Topic 6: flight (0.03) ba (0.02) service (0.01) london (0.01) seat (0.01) staff (0.01) time (0.01) crew (0.01) hours (0.01) aircraft (0.01) Topic 7: ba (0.02) flight (0.02) class (0.02) business (0.01) seat (0.01) food (0.01) good (0.01) service (0.01) club (0.01) cabin (0.01) Topic 8: ba (0.02) flight (0.02) crew (0.01) london (0.01) service (0.01) time (0.01) heathrow (0.01) cabin (0.01) food (0.01) good (0.01) Topic 9: flight (0.02) ba (0.02) london (0.01) seats (0.01) service (0.01) food (0.01) heathrow (0.01) crew (0.01) class (0.01) time (0.01)
# Assuming 'model' is the trained topic model
num_words = 10 # Number of most common words to print for each topic
topics = lda_model.show_topics(num_topics=-1, num_words=num_words, formatted=False)
fig, ax = plt.subplots(figsize=(10, 8))
for topic in topics:
words = [word for word, weight in topic[1]]
weights = [weight for word, weight in topic[1]]
ax.bar(words, weights, alpha=0.8, label=f"Topic {topic[0]}")
ax.set_xlabel("Word")
ax.set_ylabel("Weight")
ax.set_title("Top Words by Topic")
ax.legend()
plt.xticks(rotation=45)
plt.show()
# Extract the value counts for the "trip verification" column
counts = data["trip verification"].value_counts()
fig, ax = plt.subplots(figsize=(6, 6))
ax.bar(counts.index, counts.values, alpha=0.8)
ax.set_xlabel("Verification Status")
ax.set_ylabel("Count")
ax.set_title("Trip Verification Counts")
plt.show()
/Users/ashleyvizcaino/opt/anaconda3/lib/python3.9/site-packages/matplotlib/backends/backend_agg.py:240: RuntimeWarning: Glyph 9989 missing from current font. font.set_text(s, 0.0, flags=flags) /Users/ashleyvizcaino/opt/anaconda3/lib/python3.9/site-packages/matplotlib/backends/backend_agg.py:240: RuntimeWarning: Glyph 10062 missing from current font. font.set_text(s, 0.0, flags=flags) /Users/ashleyvizcaino/opt/anaconda3/lib/python3.9/site-packages/matplotlib/backends/backend_agg.py:203: RuntimeWarning: Glyph 9989 missing from current font. font.set_text(s, 0, flags=flags) /Users/ashleyvizcaino/opt/anaconda3/lib/python3.9/site-packages/matplotlib/backends/backend_agg.py:203: RuntimeWarning: Glyph 10062 missing from current font. font.set_text(s, 0, flags=flags)
#if the trip is verfied is is more likley to be a negative review
negative_words = ["bad", "not good"]
negative_reviews = data.loc[(data["trip verification"] != "trip verified") & (data["reviews"].str.contains("|".join(negative_words))), "reviews"]
for review in negative_reviews:
print(review)
break
| Vancouver to Delhi via London. We were booked to fly from Vancouver to New Delhi via London Heathrow on Dec 22nd. We received an email on Dec 20th informing us about the industrial action in the UK. I called to find out how it may impact our travel. The representative kind of scared us of being possibly stranded during our travel and offered us a full refund. We called again to confirm about other options but he did not offer any other solutions. We asked to cancel our tickets and he told us about the cancellation fee. We booked new tickets with another airline with double the cost as our trip was urgent (simultaneously). He told us he no longer can cancel our tickets on the system but promised us he will make it happen because he told us we would get a full refund. He processed the cancellation. 5 hours later he emailed us that the tickets couldn’t be cancelled and suggested we cancel our new tickets once he found out they were with another airline (we lost $1000 from cancelling the new tickets). Now, the representative claimed that he never said there won’t be any services available and asked us why we were worrying about the industrial action, and that other “arrangements” will be made if there were any issues (this was new information never given to us before). We kept our British Airway tickets. The next day, we asked to speak with the manager and report that we were misguided about the whole situation. We never heard back from the manager. As we logged in to check-in 15 hours before take off, we found that our VAN — LHR flight was cancelled (due to the bad weather in Vancouver). We did not receive any emails from the airport nor the airline about this change. We called BA again and asked about rearrangements for our flight, to which they offered rebooking with an additional $700-1000 per person price difference and the dates offered were about 7 days after our original flight was supposed to leave. Additionally, to cancel the rest of the flights with them which we could not reach since our first flight was cancelled, we had to pay about $1000 cancellation fee to get a refund. We were very frustrated with the overall experience and no accommodations nor accountability as they wanted to charge us for any service or changes possible. We were on the phone for 1.5 hours while other flights that we could have booked were getting sold out - the customer service person would take 15-20 minutes (for real) to search up each question or option we asked about. It has been a frustrating experience - we lost time and money and it ruined our holidays completely. We then demanded to speak to the manager, who again did not want to take any accountability for their team and gave us a small discount on the cancellation fee. In total, we were on the phone with them for 2 days back and forth, without any resolutions. We felt misguided and misinformed with their unfair policies towards their customers. We did not feel taken care of at all. We had to pay the price for the misinformation and the flights we did not cancel ourselves. After all this trouble, we spent Christmas without our family and the missed the urgent reasons we were travelling for.
import nltk
from nltk.tokenize import word_tokenize
from collections import Counter
# Tokenize the words in the "review" column and convert to lowercase
all_words = [word.lower() for review in data["reviews"] for word in word_tokenize(review)]
# Define a set of negative words to search for
negative_words = {"bad", "not good", "poor", "terrible", "disappointing", "disappointed", "awful", "horrible", "miserable", "stupid", "sad","nothappy"}
# Count the frequency of each negative word in the "review" column
negative_word_counts = Counter([word for word in all_words if word in negative_words])
# Create a list of negative words and their frequencies, sorted in descending order by frequency
negative_word_list = sorted(negative_word_counts.items(), key=lambda x: x[1], reverse=True)
# Print the list of negative words and their frequencies
for word, count in negative_word_list:
print(f"{word}: {count}")
poor: 118 bad: 94 terrible: 53 disappointed: 40 awful: 38 disappointing: 32 horrible: 20 sad: 10 miserable: 10 stupid: 2
from nltk.tokenize import word_tokenize
from collections import Counter
# Define a set of negative words to search for
negative_words = {"bad", "not good", "poor", "terrible", "disappointing", "disappointed", "awful", "horrible", "miserable", "stupid", "sad","nothappy"}
# Define a function to count the negative words in a review
def count_negative_words(review):
# Tokenize the words in the review and convert to lowercase
words = [word.lower() for word in word_tokenize(review)]
# Count the frequency of each negative word in the review
negative_word_counts = Counter([word for word in words if word in negative_words])
# Return the total count of negative words in the review
return sum(negative_word_counts.values())
# Create a new column in the data dataframe with the count of negative words for each review
data["negative_word_count"] = data["reviews"].apply(count_negative_words)
# Group the reviews by the "trip verification" column and calculate the mean negative word count for each group
grouped_data = data.groupby("trip verification")["negative_word_count"].mean()
# Print the mean negative word count for each group
print(grouped_data)
trip verification Not Verified | 0.378698 ✅ Trip Verified 0.425814 ❎ Not Verified 0.000000 Name: negative_word_count, dtype: float64
from nltk.tokenize import word_tokenize
from collections import Counter
# Define a set of negative words to search for
negative_words = {"bad", "not good", "poor", "terrible", "disappointing", "disappointed", "awful", "horrible", "miserable", "stupid", "sad", "nothappy"}
# Define a function to count the negative words in a review
def count_negative_words(review):
# Tokenize the words in the review and convert to lowercase
words = [word.lower() for word in word_tokenize(review)]
# Count the frequency of each negative word in the review
negative_word_counts = Counter([word for word in words if word in negative_words])
# Return the total count of negative words in the review
return sum(negative_word_counts.values())
# Create a new column in the data dataframe with the count of negative words for each review
data["negative_word_count"] = data["reviews"].apply(count_negative_words)
# Group the reviews by the "trip verification" column and calculate the total negative word count for each group
grouped_data = data.groupby("trip verification")["negative_word_count"].sum()
# Print the total negative word count for each group
print(grouped_data)
trip verification Not Verified | 64 ✅ Trip Verified 353 ❎ Not Verified 0 Name: negative_word_count, dtype: int64
# Define a set of negative words to search for
negative_words = {"bad", "not good", "poor", "terrible", "disappointing", "disappointed", "awful", "horrible", "miserable", "stupid", "sad","nothappy"}
# Define a function to count the negative words in a review
def count_negative_words(review):
# Tokenize the words in the review and convert to lowercase
words = [word.lower() for word in word_tokenize(review)]
# Count the frequency of each negative word in the review
negative_word_counts = Counter([word for word in words if word in negative_words])
# Return the total count of negative words in the review
return sum(negative_word_counts.values())
# Create a new column in the data dataframe with the count of negative words for each review
data["negative_word_count"] = data["reviews"].apply(count_negative_words)
# Group the reviews by the "trip verification" column and calculate the count of negative word for each group
grouped_data = data.groupby("trip verification")["negative_word_count"].sum()
# Convert the result to a DataFrame and plot a bar chart
ax = grouped_data.to_frame().plot(kind="bar", legend=False, color="blue")
# Set the chart title and axis labels
ax.set_title("Total Negative Word Counts by Trip Verification")
ax.set_xlabel("Trip Verification")
ax.set_ylabel("Total Negative Word Count")
# Show the chart
plt.show()
/Users/ashleyvizcaino/opt/anaconda3/lib/python3.9/site-packages/matplotlib/backends/backend_agg.py:240: RuntimeWarning: Glyph 9989 missing from current font. font.set_text(s, 0.0, flags=flags) /Users/ashleyvizcaino/opt/anaconda3/lib/python3.9/site-packages/matplotlib/backends/backend_agg.py:240: RuntimeWarning: Glyph 10062 missing from current font. font.set_text(s, 0.0, flags=flags) /Users/ashleyvizcaino/opt/anaconda3/lib/python3.9/site-packages/matplotlib/backends/backend_agg.py:203: RuntimeWarning: Glyph 9989 missing from current font. font.set_text(s, 0, flags=flags) /Users/ashleyvizcaino/opt/anaconda3/lib/python3.9/site-packages/matplotlib/backends/backend_agg.py:203: RuntimeWarning: Glyph 10062 missing from current font. font.set_text(s, 0, flags=flags)
# Create a pie chart with the percentage of negative reviews for each trip verification status
plt.pie(grouped_data.values, labels=grouped_data.index, autopct='%1.1f%%')
# Add title
plt.title('Percentage of Negative Reviews by Trip Verification Status')
# Show the chart
plt.show()
positive_words = ["good", "great", "excellent"]
positive_reviews = data.loc[(data["trip verification"] != "trip verified") & (data["reviews"].str.contains("|".join(positive_words))), "reviews"]
for review in positive_reviews:
print(reviews)
break
0 | Excellent service both on the ground and on...
1 | Good lounge at Cape Town. On time departur...
2 | A really excellent journey. Lounge not ove...
3 | This flight was one of the worst I have ev...
4 | It seems that there is a race to the bottom...
...
995 | Flew British Airways from London Heathrow t...
996 | Madrid to London. The main plus about this...
997 | London to Moscow. British Airways has down...
998 | Miami to London. My most recent BA experie...
999 | Gatwick to Barbados in December 2017. On a...
Name: reviews, Length: 1000, dtype: object
# Define a set of positive words to search for
positive_words = {"good", "excellent", "awesome", "fantastic", "great", "amazing", "superb", "wonderful", "happy", "satisfied"}
# Define a function to count the positive words in a review
def count_positive_words(reviews):
# Tokenize the words in the review and convert to lowercase
words = [word.lower() for word in word_tokenize(reviews)]
# Count the frequency of each positive word in the review
positive_word_counts = Counter([word for word in words if word in positive_words])
# Return the total count of positive words in the review
return sum(positive_word_counts.values())
# Create a new column in the data dataframe with the count of positive words for each review
data["positive_word_count"] = data["reviews"].apply(count_positive_words)
# Group the reviews by the "trip verification" column and calculate the mean positive word count for each group
grouped_data = data.groupby("trip verification")["positive_word_count"].mean()
# Print the mean positive word count for each group
print(grouped_data)
trip verification Not Verified | 0.792899 ✅ Trip Verified 0.884198 ❎ Not Verified 0.000000 Name: positive_word_count, dtype: float64
# Define a set of positive words to search for
positive_words = {"good", "great", "excellent", "awesome", "fantastic", "amazing", "love", "like", "enjoy", "happy", "satisfied"}
# Define a function to count the positive words in a review
def count_positive_words(reviews):
# Tokenize the words in the review and convert to lowercase
words = [word.lower() for word in word_tokenize(reviews)]
# Count the frequency of each positive word in the review
positive_word_counts = Counter([word for word in words if word in positive_words])
# Return the total count of positive words in the review
return sum(positive_word_counts.values())
# Create a new column in the data dataframe with the count of positive words for each review
data["positive_word_count"] = data["reviews"].apply(count_positive_words)
# Group the reviews by the "trip verification" column and calculate the count of positive words for each group
grouped_data = data.groupby("trip verification")["positive_word_count"].sum()
# Print the count of positive words for each group
print(grouped_data)
trip verification Not Verified | 161 ✅ Trip Verified 873 ❎ Not Verified 0 Name: positive_word_count, dtype: int64
import matplotlib.pyplot as plt
# Define a set of positive words to search for
positive_words = {"good", "great", "excellent", "fantastic", "awesome", "amazing", "wonderful", "happy"}
# Define a function to count the positive words in a review
def count_positive_words(review):
# Tokenize the words in the review and convert to lowercase
words = [word.lower() for word in word_tokenize(review)]
# Count the frequency of each positive word in the review
positive_word_counts = Counter([word for word in words if word in positive_words])
# Return the total count of positive words in the review
return sum(positive_word_counts.values())
# Create a new column in the data dataframe with the count of positive words for each review
data["positive_word_count"] = data["reviews"].apply(count_positive_words)
# Group the reviews by the "trip verification" column and calculate the count of positive word occurrences for each group
grouped_data = data.groupby("trip verification")["positive_word_count"].sum()
# Plot the bar chart
fig, ax = plt.subplots()
grouped_data.plot(kind="bar", ax=ax)
# Set the chart title and axis labels
ax.set_title("Count of Positive Reviews by Trip Verification")
ax.set_xlabel("Trip Verification")
ax.set_ylabel("Count")
# Show the chart
plt.show()
import matplotlib.pyplot as plt
# Define a set of positive words to search for
positive_words = {"good", "great", "excellent", "amazing", "wonderful", "fantastic", "terrific", "awesome", "satisfying", "pleasing", "enjoyable"}
# Define a function to count the positive words in a review
def count_positive_words(reviews):
# Tokenize the words in the review and convert to lowercase
words = [word.lower() for word in word_tokenize(reviews)]
# Count the frequency of each positive word in the review
positive_word_counts = Counter([word for word in words if word in positive_words])
# Return the total count of positive words in the review
return sum(positive_word_counts.values())
# Create a new column in the data dataframe with the count of positive words for each review
data["positive_word_count"] = data["reviews"].apply(count_positive_words)
# Group the reviews by the "trip verification" column and calculate the total positive word count for each group
grouped_data = data.groupby("trip verification")["positive_word_count"].sum()
# Plot a pie chart of the positive review counts
plt.pie(grouped_data, labels=grouped_data.index, autopct='%1.1f%%')
plt.title("Positive Review Counts by Trip Verification")
plt.show()
# Define a set of positive words to search for
positive_words = {"good", "great", "excellent", "amazing", "wonderful", "fantastic", "terrific", "awesome", "satisfying", "pleasing", "enjoyable"}
# Define a function to count the positive words in a review
def count_positive_words(review):
# Tokenize the words in the review and convert to lowercase
words = [word.lower() for word in word_tokenize(review)]
# Count the frequency of each positive word in the review
positive_word_counts = Counter([word for word in words if word in positive_words])
# Return the total count of positive words in the review
return sum(positive_word_counts.values())
# Create a new column in the data dataframe with the count of positive words for each review
data["positive_word_count"] = data["reviews"].apply(count_positive_words)
# Find the countries with the most positive reviews
positive_reviews = data[data["positive_word_count"] > 0]
positive_reviews_by_country = positive_reviews.groupby("countries")["positive_word_count"].count()
best_countries = positive_reviews_by_country.sort_values(ascending=False)
# Print the best countries
print(best_countries.head(10))
countries Singapore 10 South Africa 6 Qatar 4 Australia 4 Barbados 4 France 4 Egypt 3 Mexico 3 Japan 3 Canada 3 Name: positive_word_count, dtype: int64
import plotly.express as px
# Replace "best_countries" with your own variable name for the grouped data
fig = px.choropleth(best_countries, locations=best_countries.index, locationmode="country names", color="positive_word_count",
hover_name=best_countries.index, projection="natural earth", title="Countries with the most positive reviews")
fig.show()
# Define a set of negative words to search for
negative_words = {"bad", "not good", "poor", "terrible", "disappointing", "disappointed", "awful", "horrible", "miserable", "stupid", "sad", "nothappy"}
# Define a function to count the negative words in a review
def count_negative_words(review):
# Tokenize the words in the review and convert to lowercase
words = [word.lower() for word in word_tokenize(review)]
# Count the frequency of each negative word in the review
negative_word_counts = Counter([word for word in words if word in negative_words])
# Return the total count of negative words in the review
return sum(negative_word_counts.values())
# Create a new column in the data dataframe with the count of negative words for each review
data["negative_word_count"] = data["reviews"].apply(count_negative_words)
# Find the countries with the most negative reviews
negative_reviews = data[data["negative_word_count"] > 0]
negative_reviews_by_country = negative_reviews.groupby("countries")["negative_word_count"].count()
worst_countries = negative_reviews_by_country.sort_values(ascending=False)
# Print the worst countries
print(worst_countries)
countries Singapore 11 France 5 Japan 4 Barbados 4 Canada 4 South Africa 3 Jersey 3 Australia 2 Qatar 2 Egypt 2 Mexico 2 Mauritius 2 India 2 Spain 1 New Zealand 1 Malaysia 1 Iceland 1 Italy 1 Bahrain 1 Greece 1 Gibraltar 1 Ghana 1 Germany 1 Cyprus 1 Bulgaria 1 Bermuda 1 Belgium 1 Thailand 1 Name: negative_word_count, dtype: int64
import plotly.express as px
# Create a new dataframe with the count of negative reviews by country
negative_reviews_count = negative_reviews_by_country.reset_index(name='count')
# Create a choropleth map based on the negative reviews count by country
fig = px.choropleth(negative_reviews_count, locations="countries", locationmode='country names', color="count",
title="Negative Reviews by Country", color_continuous_scale="Reds")
fig.show()